library(tidyverse)
library(lubridate)
goodreads_raw <- read_csv("/Volumes/GoogleDrive/My Drive/data_stuff/R/goodreads/gr_04_20_22.csv")
#load raw data
goodreads <- goodreads_raw %>%
transmute(
book_id = `Book Id`,
title = `Title`,
author = `Author l-f`,
add_authors = `Additional Authors`,
isbn13 = str_remove(ISBN13, "^=\""),
isbn13 = str_remove(isbn13, "\"$"),
isbn13 = na_if(isbn13, ""),
my_rating = `My Rating`,
avg_rating = `Average Rating`,
n_pages = `Number of Pages`,
publication_y = `Original Publication Year`,
date_read = `Date Read`,
date_added = `Date Added`,
shelf = factor(`Exclusive Shelf`, levels = unique(`Exclusive Shelf`)),
review = `My Review`,
`Author l-f`
) %>%
separate(`Author l-f`, into = c('author_first','author_last'), sep = ',' ) %>%
mutate(
tolower(str_trim(author_first)),
tolower(str_trim(author_last))
)
#transform read data
gr_read <- goodreads %>% filter(is.na(date_read) == F) %>%
arrange(date_read) %>%
mutate(
date_start = lag(date_read),
rn = row_number()
) %>%
transmute(
rn,
book_id,
title,
author_first,
author_last,
n_pages,
date_start = ymd(date_start),
date_read = ymd(date_read),
year_read = year(date_read),
duration_days = difftime(ymd(date_read), ymd(date_start), unit = 'days')) %>%
mutate(
duration_days = replace_na(as.integer(str_trim(substr(duration_days,1,3))),1))
gr_read$duration_days[gr_read$duration_days == 0] <- 1
year_gr_read <-
gr_read %>%
group_by(year_read)%>%
summarize(mean = mean(duration_days, na.rm = TRUE),
count= n())
library(plotly)
# DONE hover over shows book,
#start and end date
# average duration in days by year
# color by genre
p <- plot_ly()
for(i in 1:(nrow(gr_read) - 1)){
p <- add_trace(p,
x = c(gr_read$date_start[i],gr_read$date_read[i]),
y = c(gr_read$rn[i],gr_read$rn[i]),
mode = "lines",
type = 'scatter',
line = list(width = 10),
#facet_col = 'year_read',
# line = list(color = df$color[i], width = 20),
showlegend = F,
hoverinfo = "text",
text = paste("Book: ", gr_read$title[i], "<br>",
"Start Date: ", gr_read$date_start[i], "<br>",
"End Date: ", gr_read$date_read[i], "<br>",
"Duration: ", gr_read$duration_days[i], ' Days')
)
}
p <- p %>% layout(
title = 'Books Read by Date and Read Time',
yaxis = list(
title = "Cumulative Read",
zeroline = F
))
p
plot_ly() %>% #for(i in 2:(nrow(gr_read) - 1)){ add_trace(p, x = c(gr_read\(date_start[4],gr_read\)date_read[4]), y = c(gr_read\(rn[4],gr_read\)rn[4]), mode = “lines”, type = ‘scatter’, line = list(width = 10), facet_col = year_read, showlegend = F, hoverinfo = “text”, text = paste(“Book:”, gr_read\(title[4], "<br>", "Start Date: ", gr_read\)date_start[4], “
”, “End Date:”, gr_read\(date_read[4], "<br>", "Duration: ", gr_read\)duration_days[4], ’ Days’) #evaluate = T )